***
* Pull data on GEO/EIN Movers & Characterize GEO/EIN Stickiness

** Prep main data **;
libname secure "";
libname data "";


data secure.df; set secure.sermbrphus_append  ; run;
data secure.df; set secure.df; if year(dob) >= 1935 & year(dob) <= 1945; run;  	
data secure.df; set secure.df; retire_age = retire1; 
				 claim_age = year(doei) + (month(doei) - 1)/12 - year(dob) - (month(dob) - 1)/12; run;


** Pull movers data **;
proc sort data=secure.df out=secure.df ;
	by ssn ;
run;

%macro make_data(name) ;
proc sort data=secure.&name. out=secure.&name. ;
	by ssn ;
run;

data data.&name. ;
	merge 

		secure.df (	in=in_df 
					keep = ssn dob)
		secure.&name. (in=mover
					keep = ssn year &name.
					rename = (year=&name._year))
	;
	by ssn ;
	if mover = 1 & in_df = 1 ;

run;
%mend make_data ;

%make_data(geo);  
%make_data(ein);

** Find location at age 60 **;
%MACRO track_at_60(NAME);
	data &NAME._60; set data.&NAME.; where &NAME._year - year(dob) <= 60; run;
	proc sort data = &NAME._60; by ssn descending &NAME._year; run;	
	data &NAME._60; set &NAME._60; by ssn; if first.ssn; run;
	data &NAME._60 (keep = SSN &NAME._60); set &NAME._60; &NAME._60 = &NAME.; run;
%MEND;
%track_at_60(geo);
%track_at_60(ein);


** Identify movers vs stayers **;
%MACRO identify_movers_stayers(NAME);
	* Tag eligible moves and movers;
	data &NAME.; set data.&NAME.; 
		move_age = &NAME._year - year(dob); 
		move_row = (move_age >= 50 & move_age <= 60 & ssn = lag(ssn));
		if move_row = 1 then lag_&NAME. = lag(&NAME.); run;
	proc sql noprint;
		CREATE TABLE is_mover AS SELECT SSN, max(move_row) AS is_mover
			FROM &NAME. GROUP BY SSN;
		CREATE TABLE &NAME._x AS SELECT * FROM &NAME. A
			LEFT JOIN is_mover B ON A.SSN = B.SSN
			ORDER BY SSN, &NAME._year;
		* Pull relevant row for each mover;
		CREATE TABLE &NAME._movers AS SELECT * FROM
			(SELECT SSN, move_age, lag_&NAME., &NAME. FROM &NAME._x WHERE move_row = 1) A
			LEFT JOIN &NAME._60 B ON A.SSN = B.SSN;
		* Identify SSN of everyone who is not an eligible mover;
		CREATE TABLE &NAME._stayers AS SELECT * FROM
			(SELECT SSN FROM &NAME._x WHERE is_mover = 0) A
			LEFT JOIN &NAME._60 B ON A.SSN = B.SSN;
	* Take only the first row for movers;
	proc sort data = &NAME._movers; by SSN descending move_age; run;
	data &NAME._movers; set &NAME._movers; by ssn; if first.ssn; run;
	* Merge movers info;
	proc sql noprint;
		CREATE TABLE &NAME._movers AS SELECT * FROM &NAME._movers A
			LEFT JOIN
			(SELECT SSN, sex, race, dob, retire1, claim_age FROM secure.df) B
			ON A.SSN = B.SSN;
%MEND;
%identify_movers_stayers(geo);
%identify_movers_stayers(ein);


** Exports movers data **;
%MACRO export_movers(NAME);
	proc export data = &NAME._movers dbms = csv replace
		outfile = "&NAME._movers.csv";
	run;
%MEND;
%export_movers(geo);
%export_movers(ein);


** Export key stats for areas from non-movers **;
%MACRO export_stayer_means(NAME);
	proc sql noprint;
		* Pull data on list of stayers;
		CREATE TABLE &NAME._avg_xx AS 
			SELECT * FROM &NAME._stayers A
			LEFT JOIN secure.df B ON A.SSN = B.SSN;
		* Create relevant variables to categorize areas;
		CREATE TABLE &NAME._avg_x AS 
			SELECT SSN, &NAME._60, retire_age, claim_age,
				(CASE WHEN year(dob) <= 1937 THEN 0 WHEN year(dob) >= 1943 THEN 1 ELSE . END) AS new_fra,
				(retire_age >= 64.5 & retire_age <= 65.5) AS retire_age_65,  
				(retire_age >= 65.5 & retire_age <= 66.5) AS retire_age_66,
				(claim_age >= 64.5 & claim_age <= 65.5) AS claim_age_65,  
				(claim_age >= 65.5 & claim_age <= 66.5) AS claim_age_66
			FROM &NAME._avg_xx;
		* Collapse list of stayers;
		CREATE TABLE &NAME._avg AS SELECT &NAME._60, new_fra,
			mean(retire_age) AS mean_retire_age, 
			mean(retire_age_65) AS mean_retire_age_65,
			mean(retire_age_66) AS mean_retire_age_66,
			mean(claim_age) AS claim_age_age, 
			mean(claim_age_65) AS mean_claim_age_65,
			mean(claim_age_66) AS mean_claim_age_66
			FROM &NAME._avg_x WHERE new_fra ~= .
			GROUP BY &NAME._60, new_fra;
	proc export data = &NAME._avg dbms = csv replace
		outfile = "&NAME._avg.csv";
	run;
%MEND;
%export_stayer_means(geo);
%export_stayer_means(ein);

run; quit;
